********************************************************************************************
*** MASTER FILE: Collective bargaining for women: How unions can create female friendly jobs
********************************************************************************************
//TOTAL RUNTIME: 284,736.56 (about 80 hours)

set seed 12345 

* INTRUCTIONS: modify to reflect directory
global CODEPATH "E:\project_CBFW\codes"
global HOME "E:\project_CBFW"
global raw "$HOME\raw_data"
global logs "$HOME\log_files"	
global files "$HOME\int_files"
global figures "$HOME\results_graphs"
global tables "$HOME\results_tables"
global DO_DIR = "$CODEPATH\pageranks"
global LOG_DIR = "$logs"
global WRITE_DIR = "$files"
global TEMP_DIR = "$files"
global RESULTS_DIR = "$files"
global MATLABBGLPATH = "$CODEPATH\pageranks"
//to call Matlab via shell
global MATLABPATH = "C:/Program Files/MATLAB/R2019a/bin/matlab" 
//to call R via shell 
global RPATH "C:/Program Files/R/R-4.3.1/bin/Rscript.exe" 
//need to edit paths on the following Matlab codes
// 1) "$DO_DIR\VALUE_2_AKM.m" lines 50 and 54
// 2) "$DO_DIR\VALUE_3_PAGERANKS.m" lines 23 and 27

* INTRUCTIONS: modify to ensure packages from _SETUP.do are available
sysdir set PLUS "E:\stata_packages
sysdir set PERSONAL "E:\stata_packages
mata: mata mlib index

* INSTRUCTIONS: modify number of cores to use as desired (see n_cpus)
set type double
set excelxlsxlargefile on
set graphics on
set varabbrev on
set rmsg off
set matsize 11000
set linesize 100
set segmentsize 1.5g
set niceness 1
global n_cpus = 8 

* Files already in the directory (not from raw)
//"$files/rais/`state'/`state'`year'_RAIS"
//"$files/unions/contracts_cnes.dta"
//"$files/unions/act.dta"
//"$files/unions/database_prepanel..dta"
//"$files/genderBR/repids_gender.csv"
//"$files/genderBR/firstnames_SDSR_g.csv"
//"$files/genderBR/firstnames_boards_0921_g.csv"

	
/*       	Cleaning        	*/ 
*********************************
//runtime: 94,768.71
	
	// Creates a worker-year panel from year-state RAIS files
		// input: year-state RAIS files, microregions_municipality_concordance
		// output: rais_20102017, matleave, workeryear_panel
	cd "$CODEPATH"
	do a01_Create_worker_year_panel 
	
	// Creates establishment level data sets with basic info (industry, microregion, totemployment by year, etc) 
		// input: workeryear_panel, microregions_municipality_concordance, contracts_cnes, rais_20102017
		// output: estabchars_wide_full, estabchars_wide, shdaysleave, shdaysleavefem
	cd "$CODEPATH"
	do a02_Estab_basic_info
	
	// Cleans union basic info from CNES; gender of union representatives; contracts from CNES (at contract, and pair-year level)
		// input: Sindicatos_Regiao_`region'; Federacoes; Confederacoes.xlsx; 
		//        AfericaoCentral_2012-2016; _reps; repids_gender; 
		//        contracts_cnes; estabchars_wide
		// output: union_list_clean; CBA_reps; contracts_act; 
		//         estab_union_act; filledpanel
	cd "$CODEPATH"
	do a03_Cleaning_CNES

	// Cleans local union boards information (scraped SD SR files)
		// input: scraped_SD_SR\output_* ; union_list_clean; estab_union_act; firstnames_SDSR_g
		// output: SD_scraped; SR_scraped; SDSR_gender; firstnames_SDSR
		//			SRSD_scraped; unionboards_SDSR_clean; union_boards_panel_20112019
	cd "$CODEPATH"
	do a04_Clean_scrapedSDSR
	
	// Cleans representatives of national union central boards 
		// input: Dirigentes_CentraisSindicais_final; firstnames_boards_0921_g
		// output: firstnames_boards_0921; National_boards_centrals;
		//			National_boards_centrals_names; National_boards_centrals_collapsed
	cd "$CODEPATH"
	do a05_Clean_national_directorates

	// Creates one single do file that creates list of establishments in our amenities sample, tagging also single estab firms. This is used to link our amenities analysis to the RAIS analysis. 
		//input: estab_union_act, contracts_cnes, contracts_act, act, estabchars_wide, rais_20102017, rais_estabid_fakeid_link_20200819_combined
		//output: estabtreat_forRAIS_deid; estabtreat_forRAIS_singest_deid
	cd "$CODEPATH"
	do a06_Link_amenities_rais
	
	// Creates incumbent panel from worker-year panel 
		// input: workeryear_panel, estabtreat_forRAIS_deid, microregions_municipality_concordance
		// output: incumbent_panel
	cd "$CODEPATH"
	do a07_Create_incumbent_sample

	// Creates subset of worker-year panel with spells in our subset of establishments 
		// input: workeryear_panel; estabtreat_forRAIS_deid
		// output: quitsnewhires
	cd "$CODEPATH"
	do a08_Create_allworkers_sample
	
	// Creates subset of full RAIS data with spells in our subset of establishments 
		// input: rais_20102017; estabtreat_forRAIS_deid; quitsnewhires
		// output: quitsnewhires_full	
	cd "$CODEPATH"
	do a09_Create_fullworkers_sample
	
	// Create dataset for analyzing establishment-level outcomes (main worker spells)
		// input: quitsnewhires
		// output: estlevel_dataset
	cd "$CODEPATH"
	do a10_Create_estlevel_dataset
	
	// Create dataset for analyzing establishment-level outcomes (all worker spells)
		// input: quitsnewhires_full
		// output: estlevel_dataset_full
	cd "$CODEPATH"
	do a11_Create_estlevel_dataset_full

	// Create dataset for analyzing outcomes of incumbents
		// input: incumbent_panel
		// output: inclevel_dataset
	cd "$CODEPATH"
	do a12_Create_inclevel_dataset

	
/*       PageRank Values    	*/ 
*********************************
//runtime: 154,987.66

	// Create year files for all Brazil to use for etsimating PageRank values
		// input: clean data files 2009-2016, e.g., RR2009_RAIS,...,RR2016_RAIS
		// output: year-specific master file, e.g., cleanYYYY
	cd "$CODEPATH"
	do b01_Prep_rais_ranks
	
	// Master file for running full code for AKM and PageRanks.	
	// [Original code based on Morchio and Moser (2024) NBER WP#32408]
		// input: year-specific master file, e.g., cleanYYYY
		// ouptut: pageranks_gender_2009_2016, akm_XXX_gender_2009_2016, 
		//			lset_gender_2009_2016, emp_gender_2009_2016, connected_XXX_2009_2016
	cd "$CODEPATH"
	do b02_VALUE_0_MASTER

	
/* Analysis: classifying amenities */
*************************************
//runtime: 3,420.08

	*** Build sample for data-driven classification of clauses
		// input: pageranks_gender_2009_2016, akm_fe_gender_2009_2016, database_prepanel, contracts_cnes.dta
		//			union_category, union_list_clean
		// output: amenities_gender_sample
	cd "$CODEPATH"
	do c01_Create_amenities_sample
	
	*** Obtain the gendered amenity classifications and add them to the analysis samples
		// input: amenities_gender_sample, clauselabels, estab_union_act, filledpanel
		// output: amenity_value_gender, amenities_classification_DD
		// modifies: estab_union_act and filldepanel
	cd "$CODEPATH"
	do c02_Create_amenities_classification
	
	*** Obtain the gendered amenity classifications and add them to the analysis samples (with industry and state fixed effects)
		// input: amenities_gender_sample, clauselabels
		// output: amenity_value_gender_ufind2, amenities_classification_DD_unfind2
	cd "$CODEPATH"
	do c03_Create_amenities_classification_ufind2  
	
	*** Obtain the gendered amenity classifications and add them to the analysis samples (separate by gender)
		// input: amenities_gender_sample, clauselabels
		// output: amenity_value_gender_fem, amenity_value_gender_mal
	cd "$CODEPATH"
	do c04_Create_amenities_classification_separate

	
/* Sample descriptives */
*************************
//runtime: 19.61

	// Creates figures comparing treated and comparison establishments
		// input: estabchars_wide, estabtreat_forRAIS_deid, estab_union_act
		// output: size_byTC, industry_alt_by_TC, region_byTC, women_emp_sh_byTC
	cd "$CODEPATH"
	do d01_Sample_descriptives
	
	// Creates tables comparing the analysis sample to 2014 RAIS establishmemts
		// input: estabchars_wide_full, estabtreat_forRAIS_deid, estab_union_act, estlevel_dataset
		// output: Tcomp_rais_sample, T_vs_C_our_samples
	cd "$CODEPATH"
	do d02_Sample_descriptives_rais
	

/* Analysis: amenities sample */
********************************
//runtime: 1,077.01

	*** Analysis of the CUT reform in the amenities sample
		// input: estab_union_act; unionboards_SDSR_clean; filledpanel; estabchars_wide; estabtreat_forRAIS_deid; estabtreat_forRAIS_singest_deid; union_boards_panel_20112019
		// output: amenities_tables (sheets: t1, T3_mainDD, T3_ratio_versions, T3alt_mainDD, T3alt_ratio_versions, TA8_mainDD, TA8_ratio_versions, TA9_mainDD, TA9_ratio_versions, TA10_mainDD, TA11_mainDD, T4_heterog, TF9)
	cd "$CODEPATH"
	do e01_amenities_tables
		// input: National_boards_centrals_collapsed; amenities_gender_sample; estab_union_act; filledpanel; estabtreat_forRAIS_deid; estabchars_wide; union_boards_panel_20112019
		// output: firststage_national_boards_treat; firststage_national_boards_centrals; 
		//			pagerankdiff_femclause_sectoral; pagerankdiff_femclause_sectoral_DD; 
		//			number_of_clauses_bysharewomen; number_of_clauses_bysharewomen_DD; 
		//			any_female_cl_DD_did_DD; female_cl_DD_sh_did_DD; female_clauses_DD_b_did_DD; 
		//			female_clauses_DD_did_DD; male_cl_DD_sh_did_DD; male_clauses_DD_did_DD; 
		//			female_clauses_DD_het_by_femxshare_DD; female_cl_DD_sh_het_by_femxshare_DD; 
		//			female_clauses_DD_het_by_femishare_DD; female_cl_DD_sh_het_by_femishare_DD;
		//			female_clauses_DD_FP; female_clauses_DD; male_clauses_DD_FP; female_male_DD_FP;
		//			shfem_unionboards_did_restr; womPorVP_unionboards_did_restr
	cd "$CODEPATH"
	do e02_amenities_figures
	

/* Analysis: incumbent sample */ 
************************************
//runtime: 3,845.80

	*** Creates the figures and tables for the analysis of the CUT reform in the incumbent worker sample
		// input: inclevel_dataset
		// output: atblemp_femcb_did; atblemp_het_by_educ_DD; incumbent_tables (sheets: t5_gender)
	cd "$CODEPATH"
	do e03_inclevel_figures 
	
	
/* Analysis: establishment sample */ 
************************************
//runtime: 1,163.00

	*** Creates the figures and tables for the analysis of the CUT reform in the establishment sample
		// input: estlevel_dataset, estlevel_dataset_full, microregions_municipality_concordance, shdaysleave, shdaysleavefem
		// output: firmenv; y_ln_tot_did; y_sh_probw1_fem_did; y_ln_wbill_tot_did; y_lnwage_neww_fem_did; y_lnwage_neww_mal_did; establishment_tables (sheets: employment_table, chars_table, wages_table, workdays_table)
	cd "$CODEPATH"
	do e04_establevel_figures
		// input: contract_wageadjustment.txt, contracts_cnes, contracts_act, estlevel_dataset, microregions_municipality_concordance
		// output: contract_wageadjustment.dta; establishment_tables (sheets: wageadj_table)	
	cd "$CODEPATH"
	do e05_wage_adjustments
		// input: orbis_brasil_data_2012_2017, estlevel_dataset
		// output: profits_orbis, establishment tables (Table 6C: profits in log file)
	cd "$CODEPATH"
	do e06_profits


/* Heterogeneity analysis and robustness */ 
*******************************************
//runtime: 16,930.73

	***FIGURES

	*** Create figures with incumbent level outcomes by share of women at establishment
		// input: inclevel_dataset
		// output: atblemp_het_by_femxshare
	cd "$CODEPATH"
	do f01_inclevel_het
	
	*** Create figures with establishment level outcomes by share of women at establishment
		// input: estlevel_dataset, estlevel_dataset_full, microregions_municipality_concordance
		// output: sh_mgmt_fem_mgmt_het_by_femxshare, sh_mlext_het_by_femxshare, sh_mlprot_mleave_het_by_femxshare, y_lnwage_oldw_fem_het_by_femxshare, y_ln_tot_het_by_femxshare
	cd "$CODEPATH"
	do f02_establevel_het
	
	***TABLES
	
	*** Heterogeneity by low representation of women in the workplace vs union
		// input: filledpanel, unionboards_SDSR_clean, estabchars_wide
		// output: amenities_tables (sheet: TF9_amenities)
	cd "$CODEPATH"
	do f03_heterogeneity_femshare

	*** Downstream effects by representation of women in the union
		// input: filledpanel, unionboards_SDSR_clean, estabchars_wide
		// output: heterog1_tables (sheet: T4_heterog)
	cd "$CODEPATH"
	do f04_amenities_heterog1
		// input: inclevel_dataset, unionboards_SDSR_clean
		// output: heterog1_tables (sheet: T4_incfemcball, T4_incmalcball))
	cd "$CODEPATH"
	do f05_inclevel_heterog1
		// input: estlevel_dataset, estlevel_dataset_full, microregions_municipality_concordance, unionboards_SDSR_clean, shdaysleave, shdaysleavefem
		// output: heterog1_tables (sheet: T4_firmenv_heterog, T4_empl_heterog, T4_workd_heterog)
	cd "$CODEPATH"
	do f06_establevel_heterog1
	
	*** Heterogeneity analysis to explore potential mechanisms
		// input: filledpanel, union_category, union_boards_panel_20112019
		// output: heterogx_tables (sheet: T4_heterog)
	cd "$CODEPATH"
	do f07_amenities_heterogx
		// input: inclevel_dataset, union_category, union_boards_panel_20112019
		// output: heterogx_tables (sheet: T4_incfemall)
	cd "$CODEPATH"
	do f08_inclevel_heterogx
	
	*** Spillover effects at multi-establishment firms
		// input: estlevel_dataset, inclevel_dataset, microregions_municipality_concordance
		// output: spillovers
	cd "$CODEPATH"
	do f09_heterogeneity_spillovers

	
/* Welfare analysis */ 
**********************
//runtime: 8,523.96

	*** Welfare estimation with new regression approach 
		// input: estlevel_dataset
		// output: welfare_data, welfare_FINAL, welfare_robustness
	cd "$CODEPATH"
	do g01_welfare

	
